library(titanic)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
## ✔ broom        1.0.2     ✔ rsample      1.1.1
## ✔ dials        1.1.0     ✔ tune         1.0.1
## ✔ infer        1.0.4     ✔ workflows    1.1.2
## ✔ modeldata    1.1.0     ✔ workflowsets 1.0.0
## ✔ parsnip      1.0.3     ✔ yardstick    1.1.0
## ✔ recipes      1.0.4     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Use suppressPackageStartupMessages() to eliminate package startup messages
library(glmnet) 
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loaded glmnet 4.1-6
library(tidymodels)
library(e1071) 
## 
## Attaching package: 'e1071'
## 
## The following object is masked from 'package:tune':
## 
##     tune
## 
## The following object is masked from 'package:rsample':
## 
##     permutations
## 
## The following object is masked from 'package:parsnip':
## 
##     tune
library(ROCR)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(ggcorrplot)
library(splines)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(leaps)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(skimr) 
library(mice)
## 
## Attaching package: 'mice'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
productfailure = read_csv("train.csv")
## Rows: 26570 Columns: 26
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): product_code, attribute_0, attribute_1, failure
## dbl (22): id, loading, attribute_2, attribute_3, measurement_0, measurement_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(productfailure)
## spc_tbl_ [26,570 × 26] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ id            : num [1:26570] 0 1 2 3 4 5 6 7 8 9 ...
##  $ product_code  : chr [1:26570] "A" "A" "A" "A" ...
##  $ loading       : num [1:26570] 80.1 84.9 82.4 101.1 188.1 ...
##  $ attribute_0   : chr [1:26570] "material_7" "material_7" "material_7" "material_7" ...
##  $ attribute_1   : chr [1:26570] "material_8" "material_8" "material_8" "material_8" ...
##  $ attribute_2   : num [1:26570] 9 9 9 9 9 9 9 9 9 9 ...
##  $ attribute_3   : num [1:26570] 5 5 5 5 5 5 5 5 5 5 ...
##  $ measurement_0 : num [1:26570] 7 14 12 13 9 11 12 4 9 10 ...
##  $ measurement_1 : num [1:26570] 8 3 1 2 2 4 2 8 6 4 ...
##  $ measurement_2 : num [1:26570] 4 3 5 6 8 0 4 8 5 7 ...
##  $ measurement_3 : num [1:26570] 18 18.2 18.1 17.3 19.3 ...
##  $ measurement_4 : num [1:26570] 12.5 11.5 11.7 11.2 12.9 ...
##  $ measurement_5 : num [1:26570] 15.7 17.7 16.7 18.6 17 ...
##  $ measurement_6 : num [1:26570] 19.3 17.9 18.2 18.3 15.7 ...
##  $ measurement_7 : num [1:26570] 11.7 12.7 12.7 12.6 11.3 ...
##  $ measurement_8 : num [1:26570] 20.2 17.9 18.3 19.1 18.1 ...
##  $ measurement_9 : num [1:26570] 10.7 12.4 12.7 12.5 10.3 ...
##  $ measurement_10: num [1:26570] 15.9 17.9 15.6 16.3 17.1 ...
##  $ measurement_11: num [1:26570] 17.6 17.9 NA 18.4 19.9 ...
##  $ measurement_12: num [1:26570] 15.2 11.8 13.8 10 12.4 ...
##  $ measurement_13: num [1:26570] 15 14.7 16.7 15.2 16.2 ...
##  $ measurement_14: num [1:26570] NA 15.4 18.6 15.6 12.8 ...
##  $ measurement_15: num [1:26570] 13 14.4 14.1 16.2 13.2 ...
##  $ measurement_16: num [1:26570] 14.7 15.6 17.9 17.2 16.4 ...
##  $ measurement_17: num [1:26570] 764 682 663 826 580 ...
##  $ failure       : chr [1:26570] "No" "No" "No" "No" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   id = col_double(),
##   ..   product_code = col_character(),
##   ..   loading = col_double(),
##   ..   attribute_0 = col_character(),
##   ..   attribute_1 = col_character(),
##   ..   attribute_2 = col_double(),
##   ..   attribute_3 = col_double(),
##   ..   measurement_0 = col_double(),
##   ..   measurement_1 = col_double(),
##   ..   measurement_2 = col_double(),
##   ..   measurement_3 = col_double(),
##   ..   measurement_4 = col_double(),
##   ..   measurement_5 = col_double(),
##   ..   measurement_6 = col_double(),
##   ..   measurement_7 = col_double(),
##   ..   measurement_8 = col_double(),
##   ..   measurement_9 = col_double(),
##   ..   measurement_10 = col_double(),
##   ..   measurement_11 = col_double(),
##   ..   measurement_12 = col_double(),
##   ..   measurement_13 = col_double(),
##   ..   measurement_14 = col_double(),
##   ..   measurement_15 = col_double(),
##   ..   measurement_16 = col_double(),
##   ..   measurement_17 = col_double(),
##   ..   failure = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(productfailure)
##        id        product_code          loading       attribute_0       
##  Min.   :    0   Length:26570       Min.   : 33.16   Length:26570      
##  1st Qu.: 6642   Class :character   1st Qu.: 99.99   Class :character  
##  Median :13284   Mode  :character   Median :122.39   Mode  :character  
##  Mean   :13284                      Mean   :127.83                     
##  3rd Qu.:19927                      3rd Qu.:149.15                     
##  Max.   :26569                      Max.   :385.86                     
##                                     NA's   :250                        
##  attribute_1         attribute_2     attribute_3   measurement_0   
##  Length:26570       Min.   :5.000   Min.   :5.00   Min.   : 0.000  
##  Class :character   1st Qu.:6.000   1st Qu.:6.00   1st Qu.: 4.000  
##  Mode  :character   Median :6.000   Median :8.00   Median : 7.000  
##                     Mean   :6.754   Mean   :7.24   Mean   : 7.416  
##                     3rd Qu.:8.000   3rd Qu.:8.00   3rd Qu.:10.000  
##                     Max.   :9.000   Max.   :9.00   Max.   :29.000  
##                                                                    
##  measurement_1    measurement_2    measurement_3   measurement_4   
##  Min.   : 0.000   Min.   : 0.000   Min.   :13.97   Min.   : 8.008  
##  1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:17.12   1st Qu.:11.051  
##  Median : 8.000   Median : 6.000   Median :17.79   Median :11.733  
##  Mean   : 8.233   Mean   : 6.257   Mean   :17.79   Mean   :11.732  
##  3rd Qu.:11.000   3rd Qu.: 8.000   3rd Qu.:18.47   3rd Qu.:12.410  
##  Max.   :29.000   Max.   :24.000   Max.   :21.50   Max.   :16.484  
##                                    NA's   :381     NA's   :538     
##  measurement_5   measurement_6   measurement_7    measurement_8  
##  Min.   :12.07   Min.   :12.71   Min.   : 7.968   Min.   :15.22  
##  1st Qu.:16.44   1st Qu.:16.84   1st Qu.:11.045   1st Qu.:18.34  
##  Median :17.13   Median :17.52   Median :11.712   Median :19.02  
##  Mean   :17.13   Mean   :17.51   Mean   :11.717   Mean   :19.02  
##  3rd Qu.:17.80   3rd Qu.:18.18   3rd Qu.:12.391   3rd Qu.:19.71  
##  Max.   :21.43   Max.   :21.54   Max.   :15.419   Max.   :23.81  
##  NA's   :676     NA's   :796     NA's   :937      NA's   :1048   
##  measurement_9    measurement_10   measurement_11  measurement_12  
##  Min.   : 7.537   Min.   : 9.323   Min.   :12.46   Min.   : 5.167  
##  1st Qu.:10.757   1st Qu.:15.209   1st Qu.:18.17   1st Qu.:10.703  
##  Median :11.430   Median :16.127   Median :19.21   Median :11.717  
##  Mean   :11.431   Mean   :16.118   Mean   :19.17   Mean   :11.703  
##  3rd Qu.:12.102   3rd Qu.:17.025   3rd Qu.:20.21   3rd Qu.:12.709  
##  Max.   :15.412   Max.   :22.479   Max.   :25.64   Max.   :17.663  
##  NA's   :1227     NA's   :1300     NA's   :1468    NA's   :1601    
##  measurement_13  measurement_14  measurement_15   measurement_16  
##  Min.   :10.89   Min.   : 9.14   Min.   : 9.104   Min.   : 9.701  
##  1st Qu.:14.89   1st Qu.:15.06   1st Qu.:13.957   1st Qu.:15.268  
##  Median :15.63   Median :16.04   Median :14.969   Median :16.436  
##  Mean   :15.65   Mean   :16.05   Mean   :14.996   Mean   :16.461  
##  3rd Qu.:16.37   3rd Qu.:17.08   3rd Qu.:16.018   3rd Qu.:17.628  
##  Max.   :22.71   Max.   :22.30   Max.   :21.626   Max.   :24.094  
##  NA's   :1774    NA's   :1874    NA's   :2009     NA's   :2110    
##  measurement_17     failure         
##  Min.   : 196.8   Length:26570      
##  1st Qu.: 619.0   Class :character  
##  Median : 701.0   Mode  :character  
##  Mean   : 701.3                     
##  3rd Qu.: 784.1                     
##  Max.   :1312.8                     
##  NA's   :2284
productfailure 
## # A tibble: 26,570 × 26
##       id produ…¹ loading attri…² attri…³ attri…⁴ attri…⁵ measu…⁶ measu…⁷ measu…⁸
##    <dbl> <chr>     <dbl> <chr>   <chr>     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1     0 A          80.1 materi… materi…       9       5       7       8       4
##  2     1 A          84.9 materi… materi…       9       5      14       3       3
##  3     2 A          82.4 materi… materi…       9       5      12       1       5
##  4     3 A         101.  materi… materi…       9       5      13       2       6
##  5     4 A         188.  materi… materi…       9       5       9       2       8
##  6     5 A          75.4 materi… materi…       9       5      11       4       0
##  7     6 A         162.  materi… materi…       9       5      12       2       4
##  8     7 A         178.  materi… materi…       9       5       4       8       8
##  9     8 A         110.  materi… materi…       9       5       9       6       5
## 10     9 A          98.7 materi… materi…       9       5      10       4       7
## # … with 26,560 more rows, 16 more variables: measurement_3 <dbl>,
## #   measurement_4 <dbl>, measurement_5 <dbl>, measurement_6 <dbl>,
## #   measurement_7 <dbl>, measurement_8 <dbl>, measurement_9 <dbl>,
## #   measurement_10 <dbl>, measurement_11 <dbl>, measurement_12 <dbl>,
## #   measurement_13 <dbl>, measurement_14 <dbl>, measurement_15 <dbl>,
## #   measurement_16 <dbl>, measurement_17 <dbl>, failure <chr>, and abbreviated
## #   variable names ¹​product_code, ²​attribute_0, ³​attribute_1, ⁴​attribute_2, …
productfailure = productfailure %>% mutate(failure = as_factor(failure)) %>% 
  mutate(product_code = as_factor(product_code)) %>% mutate(attribute_0 = as_factor(attribute_0)) %>%
  mutate(attribute_1 = as_factor(attribute_1))
summary(productfailure)
##        id        product_code    loading           attribute_0   
##  Min.   :    0   A:5100       Min.   : 33.16   material_7:21320  
##  1st Qu.: 6642   B:5250       1st Qu.: 99.99   material_5: 5250  
##  Median :13284   C:5765       Median :122.39                     
##  Mean   :13284   D:5112       Mean   :127.83                     
##  3rd Qu.:19927   E:5343       3rd Qu.:149.15                     
##  Max.   :26569                Max.   :385.86                     
##                               NA's   :250                        
##      attribute_1     attribute_2     attribute_3   measurement_0   
##  material_8:10865   Min.   :5.000   Min.   :5.00   Min.   : 0.000  
##  material_5:10362   1st Qu.:6.000   1st Qu.:6.00   1st Qu.: 4.000  
##  material_6: 5343   Median :6.000   Median :8.00   Median : 7.000  
##                     Mean   :6.754   Mean   :7.24   Mean   : 7.416  
##                     3rd Qu.:8.000   3rd Qu.:8.00   3rd Qu.:10.000  
##                     Max.   :9.000   Max.   :9.00   Max.   :29.000  
##                                                                    
##  measurement_1    measurement_2    measurement_3   measurement_4   
##  Min.   : 0.000   Min.   : 0.000   Min.   :13.97   Min.   : 8.008  
##  1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:17.12   1st Qu.:11.051  
##  Median : 8.000   Median : 6.000   Median :17.79   Median :11.733  
##  Mean   : 8.233   Mean   : 6.257   Mean   :17.79   Mean   :11.732  
##  3rd Qu.:11.000   3rd Qu.: 8.000   3rd Qu.:18.47   3rd Qu.:12.410  
##  Max.   :29.000   Max.   :24.000   Max.   :21.50   Max.   :16.484  
##                                    NA's   :381     NA's   :538     
##  measurement_5   measurement_6   measurement_7    measurement_8  
##  Min.   :12.07   Min.   :12.71   Min.   : 7.968   Min.   :15.22  
##  1st Qu.:16.44   1st Qu.:16.84   1st Qu.:11.045   1st Qu.:18.34  
##  Median :17.13   Median :17.52   Median :11.712   Median :19.02  
##  Mean   :17.13   Mean   :17.51   Mean   :11.717   Mean   :19.02  
##  3rd Qu.:17.80   3rd Qu.:18.18   3rd Qu.:12.391   3rd Qu.:19.71  
##  Max.   :21.43   Max.   :21.54   Max.   :15.419   Max.   :23.81  
##  NA's   :676     NA's   :796     NA's   :937      NA's   :1048   
##  measurement_9    measurement_10   measurement_11  measurement_12  
##  Min.   : 7.537   Min.   : 9.323   Min.   :12.46   Min.   : 5.167  
##  1st Qu.:10.757   1st Qu.:15.209   1st Qu.:18.17   1st Qu.:10.703  
##  Median :11.430   Median :16.127   Median :19.21   Median :11.717  
##  Mean   :11.431   Mean   :16.118   Mean   :19.17   Mean   :11.703  
##  3rd Qu.:12.102   3rd Qu.:17.025   3rd Qu.:20.21   3rd Qu.:12.709  
##  Max.   :15.412   Max.   :22.479   Max.   :25.64   Max.   :17.663  
##  NA's   :1227     NA's   :1300     NA's   :1468    NA's   :1601    
##  measurement_13  measurement_14  measurement_15   measurement_16  
##  Min.   :10.89   Min.   : 9.14   Min.   : 9.104   Min.   : 9.701  
##  1st Qu.:14.89   1st Qu.:15.06   1st Qu.:13.957   1st Qu.:15.268  
##  Median :15.63   Median :16.04   Median :14.969   Median :16.436  
##  Mean   :15.65   Mean   :16.05   Mean   :14.996   Mean   :16.461  
##  3rd Qu.:16.37   3rd Qu.:17.08   3rd Qu.:16.018   3rd Qu.:17.628  
##  Max.   :22.71   Max.   :22.30   Max.   :21.626   Max.   :24.094  
##  NA's   :1774    NA's   :1874    NA's   :2009     NA's   :2110    
##  measurement_17   failure    
##  Min.   : 196.8   No :20921  
##  1st Qu.: 619.0   Yes: 5649  
##  Median : 701.0              
##  Mean   : 701.3              
##  3rd Qu.: 784.1              
##  Max.   :1312.8              
##  NA's   :2284
str(productfailure)
## tibble [26,570 × 26] (S3: tbl_df/tbl/data.frame)
##  $ id            : num [1:26570] 0 1 2 3 4 5 6 7 8 9 ...
##  $ product_code  : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ loading       : num [1:26570] 80.1 84.9 82.4 101.1 188.1 ...
##  $ attribute_0   : Factor w/ 2 levels "material_7","material_5": 1 1 1 1 1 1 1 1 1 1 ...
##  $ attribute_1   : Factor w/ 3 levels "material_8","material_5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ attribute_2   : num [1:26570] 9 9 9 9 9 9 9 9 9 9 ...
##  $ attribute_3   : num [1:26570] 5 5 5 5 5 5 5 5 5 5 ...
##  $ measurement_0 : num [1:26570] 7 14 12 13 9 11 12 4 9 10 ...
##  $ measurement_1 : num [1:26570] 8 3 1 2 2 4 2 8 6 4 ...
##  $ measurement_2 : num [1:26570] 4 3 5 6 8 0 4 8 5 7 ...
##  $ measurement_3 : num [1:26570] 18 18.2 18.1 17.3 19.3 ...
##  $ measurement_4 : num [1:26570] 12.5 11.5 11.7 11.2 12.9 ...
##  $ measurement_5 : num [1:26570] 15.7 17.7 16.7 18.6 17 ...
##  $ measurement_6 : num [1:26570] 19.3 17.9 18.2 18.3 15.7 ...
##  $ measurement_7 : num [1:26570] 11.7 12.7 12.7 12.6 11.3 ...
##  $ measurement_8 : num [1:26570] 20.2 17.9 18.3 19.1 18.1 ...
##  $ measurement_9 : num [1:26570] 10.7 12.4 12.7 12.5 10.3 ...
##  $ measurement_10: num [1:26570] 15.9 17.9 15.6 16.3 17.1 ...
##  $ measurement_11: num [1:26570] 17.6 17.9 NA 18.4 19.9 ...
##  $ measurement_12: num [1:26570] 15.2 11.8 13.8 10 12.4 ...
##  $ measurement_13: num [1:26570] 15 14.7 16.7 15.2 16.2 ...
##  $ measurement_14: num [1:26570] NA 15.4 18.6 15.6 12.8 ...
##  $ measurement_15: num [1:26570] 13 14.4 14.1 16.2 13.2 ...
##  $ measurement_16: num [1:26570] 14.7 15.6 17.9 17.2 16.4 ...
##  $ measurement_17: num [1:26570] 764 682 663 826 580 ...
##  $ failure       : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 2 1 1 ...
summary(productfailure)
##        id        product_code    loading           attribute_0   
##  Min.   :    0   A:5100       Min.   : 33.16   material_7:21320  
##  1st Qu.: 6642   B:5250       1st Qu.: 99.99   material_5: 5250  
##  Median :13284   C:5765       Median :122.39                     
##  Mean   :13284   D:5112       Mean   :127.83                     
##  3rd Qu.:19927   E:5343       3rd Qu.:149.15                     
##  Max.   :26569                Max.   :385.86                     
##                               NA's   :250                        
##      attribute_1     attribute_2     attribute_3   measurement_0   
##  material_8:10865   Min.   :5.000   Min.   :5.00   Min.   : 0.000  
##  material_5:10362   1st Qu.:6.000   1st Qu.:6.00   1st Qu.: 4.000  
##  material_6: 5343   Median :6.000   Median :8.00   Median : 7.000  
##                     Mean   :6.754   Mean   :7.24   Mean   : 7.416  
##                     3rd Qu.:8.000   3rd Qu.:8.00   3rd Qu.:10.000  
##                     Max.   :9.000   Max.   :9.00   Max.   :29.000  
##                                                                    
##  measurement_1    measurement_2    measurement_3   measurement_4   
##  Min.   : 0.000   Min.   : 0.000   Min.   :13.97   Min.   : 8.008  
##  1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:17.12   1st Qu.:11.051  
##  Median : 8.000   Median : 6.000   Median :17.79   Median :11.733  
##  Mean   : 8.233   Mean   : 6.257   Mean   :17.79   Mean   :11.732  
##  3rd Qu.:11.000   3rd Qu.: 8.000   3rd Qu.:18.47   3rd Qu.:12.410  
##  Max.   :29.000   Max.   :24.000   Max.   :21.50   Max.   :16.484  
##                                    NA's   :381     NA's   :538     
##  measurement_5   measurement_6   measurement_7    measurement_8  
##  Min.   :12.07   Min.   :12.71   Min.   : 7.968   Min.   :15.22  
##  1st Qu.:16.44   1st Qu.:16.84   1st Qu.:11.045   1st Qu.:18.34  
##  Median :17.13   Median :17.52   Median :11.712   Median :19.02  
##  Mean   :17.13   Mean   :17.51   Mean   :11.717   Mean   :19.02  
##  3rd Qu.:17.80   3rd Qu.:18.18   3rd Qu.:12.391   3rd Qu.:19.71  
##  Max.   :21.43   Max.   :21.54   Max.   :15.419   Max.   :23.81  
##  NA's   :676     NA's   :796     NA's   :937      NA's   :1048   
##  measurement_9    measurement_10   measurement_11  measurement_12  
##  Min.   : 7.537   Min.   : 9.323   Min.   :12.46   Min.   : 5.167  
##  1st Qu.:10.757   1st Qu.:15.209   1st Qu.:18.17   1st Qu.:10.703  
##  Median :11.430   Median :16.127   Median :19.21   Median :11.717  
##  Mean   :11.431   Mean   :16.118   Mean   :19.17   Mean   :11.703  
##  3rd Qu.:12.102   3rd Qu.:17.025   3rd Qu.:20.21   3rd Qu.:12.709  
##  Max.   :15.412   Max.   :22.479   Max.   :25.64   Max.   :17.663  
##  NA's   :1227     NA's   :1300     NA's   :1468    NA's   :1601    
##  measurement_13  measurement_14  measurement_15   measurement_16  
##  Min.   :10.89   Min.   : 9.14   Min.   : 9.104   Min.   : 9.701  
##  1st Qu.:14.89   1st Qu.:15.06   1st Qu.:13.957   1st Qu.:15.268  
##  Median :15.63   Median :16.04   Median :14.969   Median :16.436  
##  Mean   :15.65   Mean   :16.05   Mean   :14.996   Mean   :16.461  
##  3rd Qu.:16.37   3rd Qu.:17.08   3rd Qu.:16.018   3rd Qu.:17.628  
##  Max.   :22.71   Max.   :22.30   Max.   :21.626   Max.   :24.094  
##  NA's   :1774    NA's   :1874    NA's   :2009     NA's   :2110    
##  measurement_17   failure    
##  Min.   : 196.8   No :20921  
##  1st Qu.: 619.0   Yes: 5649  
##  Median : 701.0              
##  Mean   : 701.3              
##  3rd Qu.: 784.1              
##  Max.   :1312.8              
##  NA's   :2284
str(productfailure)
## tibble [26,570 × 26] (S3: tbl_df/tbl/data.frame)
##  $ id            : num [1:26570] 0 1 2 3 4 5 6 7 8 9 ...
##  $ product_code  : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ loading       : num [1:26570] 80.1 84.9 82.4 101.1 188.1 ...
##  $ attribute_0   : Factor w/ 2 levels "material_7","material_5": 1 1 1 1 1 1 1 1 1 1 ...
##  $ attribute_1   : Factor w/ 3 levels "material_8","material_5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ attribute_2   : num [1:26570] 9 9 9 9 9 9 9 9 9 9 ...
##  $ attribute_3   : num [1:26570] 5 5 5 5 5 5 5 5 5 5 ...
##  $ measurement_0 : num [1:26570] 7 14 12 13 9 11 12 4 9 10 ...
##  $ measurement_1 : num [1:26570] 8 3 1 2 2 4 2 8 6 4 ...
##  $ measurement_2 : num [1:26570] 4 3 5 6 8 0 4 8 5 7 ...
##  $ measurement_3 : num [1:26570] 18 18.2 18.1 17.3 19.3 ...
##  $ measurement_4 : num [1:26570] 12.5 11.5 11.7 11.2 12.9 ...
##  $ measurement_5 : num [1:26570] 15.7 17.7 16.7 18.6 17 ...
##  $ measurement_6 : num [1:26570] 19.3 17.9 18.2 18.3 15.7 ...
##  $ measurement_7 : num [1:26570] 11.7 12.7 12.7 12.6 11.3 ...
##  $ measurement_8 : num [1:26570] 20.2 17.9 18.3 19.1 18.1 ...
##  $ measurement_9 : num [1:26570] 10.7 12.4 12.7 12.5 10.3 ...
##  $ measurement_10: num [1:26570] 15.9 17.9 15.6 16.3 17.1 ...
##  $ measurement_11: num [1:26570] 17.6 17.9 NA 18.4 19.9 ...
##  $ measurement_12: num [1:26570] 15.2 11.8 13.8 10 12.4 ...
##  $ measurement_13: num [1:26570] 15 14.7 16.7 15.2 16.2 ...
##  $ measurement_14: num [1:26570] NA 15.4 18.6 15.6 12.8 ...
##  $ measurement_15: num [1:26570] 13 14.4 14.1 16.2 13.2 ...
##  $ measurement_16: num [1:26570] 14.7 15.6 17.9 17.2 16.4 ...
##  $ measurement_17: num [1:26570] 764 682 663 826 580 ...
##  $ failure       : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 2 1 1 ...
skim(productfailure)
Data summary
Name productfailure
Number of rows 26570
Number of columns 26
_______________________
Column type frequency:
factor 4
numeric 22
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
product_code 0 1 FALSE 5 C: 5765, E: 5343, B: 5250, D: 5112
attribute_0 0 1 FALSE 2 mat: 21320, mat: 5250
attribute_1 0 1 FALSE 3 mat: 10865, mat: 10362, mat: 5343
failure 0 1 FALSE 2 No: 20921, Yes: 5649

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 13284.50 7670.24 0.00 6642.25 13284.50 19926.75 26569.00 ▇▇▇▇▇
loading 250 0.99 127.83 39.03 33.16 99.99 122.39 149.15 385.86 ▃▇▂▁▁
attribute_2 0 1.00 6.75 1.47 5.00 6.00 6.00 8.00 9.00 ▅▇▁▅▃
attribute_3 0 1.00 7.24 1.46 5.00 6.00 8.00 8.00 9.00 ▃▃▁▇▃
measurement_0 0 1.00 7.42 4.12 0.00 4.00 7.00 10.00 29.00 ▆▇▂▁▁
measurement_1 0 1.00 8.23 4.20 0.00 5.00 8.00 11.00 29.00 ▅▇▃▁▁
measurement_2 0 1.00 6.26 3.31 0.00 4.00 6.00 8.00 24.00 ▅▇▂▁▁
measurement_3 381 0.99 17.79 1.00 13.97 17.12 17.79 18.47 21.50 ▁▃▇▃▁
measurement_4 538 0.98 11.73 1.00 8.01 11.05 11.73 12.41 16.48 ▁▅▇▁▁
measurement_5 676 0.97 17.13 1.00 12.07 16.44 17.13 17.80 21.42 ▁▁▇▃▁
measurement_6 796 0.97 17.51 1.00 12.71 16.84 17.52 18.18 21.54 ▁▂▇▅▁
measurement_7 937 0.96 11.72 1.00 7.97 11.04 11.71 12.39 15.42 ▁▃▇▃▁
measurement_8 1048 0.96 19.02 1.01 15.22 18.34 19.02 19.71 23.81 ▁▅▇▂▁
measurement_9 1227 0.95 11.43 1.00 7.54 10.76 11.43 12.10 15.41 ▁▃▇▃▁
measurement_10 1300 0.95 16.12 1.41 9.32 15.21 16.13 17.02 22.48 ▁▂▇▂▁
measurement_11 1468 0.94 19.17 1.52 12.46 18.17 19.21 20.21 25.64 ▁▂▇▃▁
measurement_12 1601 0.94 11.70 1.49 5.17 10.70 11.72 12.71 17.66 ▁▂▇▃▁
measurement_13 1774 0.93 15.65 1.16 10.89 14.89 15.63 16.37 22.71 ▁▇▇▁▁
measurement_14 1874 0.93 16.05 1.49 9.14 15.06 16.04 17.08 22.30 ▁▂▇▃▁
measurement_15 2009 0.92 15.00 1.55 9.10 13.96 14.97 16.02 21.63 ▁▃▇▂▁
measurement_16 2110 0.92 16.46 1.71 9.70 15.27 16.44 17.63 24.09 ▁▃▇▂▁
measurement_17 2284 0.91 701.27 123.30 196.79 618.96 701.02 784.09 1312.79 ▁▅▇▁▁
productfailure = productfailure %>% dplyr::select(c("failure","product_code","loading","attribute_0","attribute_1","attribute_2","attribute_3","measurement_0","measurement_1","measurement_2","measurement_3","measurement_4","measurement_5","measurement_6","measurement_7","measurement_8","measurement_9","measurement_10","measurement_11","measurement_12","measurement_13","measurement_14","measurement_15","measurement_16","measurement_17"))
set.seed(1234)
imp_prod = mice(productfailure, m=5, method = "pmm", printFlag = FALSE)
## Warning: Number of logged events: 768
summary(imp_prod)
## Class: mids
## Number of multiple imputations:  5 
## Imputation methods:
##        failure   product_code        loading    attribute_0    attribute_1 
##             ""             ""          "pmm"             ""             "" 
##    attribute_2    attribute_3  measurement_0  measurement_1  measurement_2 
##             ""             ""             ""             ""             "" 
##  measurement_3  measurement_4  measurement_5  measurement_6  measurement_7 
##          "pmm"          "pmm"          "pmm"          "pmm"          "pmm" 
##  measurement_8  measurement_9 measurement_10 measurement_11 measurement_12 
##          "pmm"          "pmm"          "pmm"          "pmm"          "pmm" 
## measurement_13 measurement_14 measurement_15 measurement_16 measurement_17 
##          "pmm"          "pmm"          "pmm"          "pmm"          "pmm" 
## PredictorMatrix:
##              failure product_code loading attribute_0 attribute_1 attribute_2
## failure            0            1       1           1           1           1
## product_code       1            0       1           1           1           1
## loading            1            1       0           1           1           1
## attribute_0        1            1       1           0           1           1
## attribute_1        1            1       1           1           0           1
## attribute_2        1            1       1           1           1           0
##              attribute_3 measurement_0 measurement_1 measurement_2
## failure                1             1             1             1
## product_code           1             1             1             1
## loading                1             1             1             1
## attribute_0            1             1             1             1
## attribute_1            1             1             1             1
## attribute_2            1             1             1             1
##              measurement_3 measurement_4 measurement_5 measurement_6
## failure                  1             1             1             1
## product_code             1             1             1             1
## loading                  1             1             1             1
## attribute_0              1             1             1             1
## attribute_1              1             1             1             1
## attribute_2              1             1             1             1
##              measurement_7 measurement_8 measurement_9 measurement_10
## failure                  1             1             1              1
## product_code             1             1             1              1
## loading                  1             1             1              1
## attribute_0              1             1             1              1
## attribute_1              1             1             1              1
## attribute_2              1             1             1              1
##              measurement_11 measurement_12 measurement_13 measurement_14
## failure                   1              1              1              1
## product_code              1              1              1              1
## loading                   1              1              1              1
## attribute_0               1              1              1              1
## attribute_1               1              1              1              1
## attribute_2               1              1              1              1
##              measurement_15 measurement_16 measurement_17
## failure                   1              1              1
## product_code              1              1              1
## loading                   1              1              1
## attribute_0               1              1              1
## attribute_1               1              1              1
## attribute_2               1              1              1
## Number of logged events:  768 
##   it im           dep meth
## 1  1  1       loading  pmm
## 2  1  1       loading  pmm
## 3  1  1 measurement_3  pmm
## 4  1  1 measurement_3  pmm
## 5  1  1 measurement_4  pmm
## 6  1  1 measurement_4  pmm
##                                                                                                                                                                                                                                                        out
## 1                                                                                                                                                                  product_codeC, attribute_0material_5, attribute_1material_6, attribute_3, measurement_1
## 2 mice detected that your data are (nearly) multi-collinear.\nIt applied a ridge penalty to continue calculations, but the results can be unstable.\nDoes your dataset contain duplicates, linear transformation, or factors with unique respondent names?
## 3                                                                                                                                                                        product_codeB, product_codeE, attribute_1material_5, measurement_0, measurement_2
## 4 mice detected that your data are (nearly) multi-collinear.\nIt applied a ridge penalty to continue calculations, but the results can be unstable.\nDoes your dataset contain duplicates, linear transformation, or factors with unique respondent names?
## 5                                                                                                                                                                product_codeB, product_codeC, attribute_0material_5, attribute_1material_5, measurement_0
## 6 mice detected that your data are (nearly) multi-collinear.\nIt applied a ridge penalty to continue calculations, but the results can be unstable.\nDoes your dataset contain duplicates, linear transformation, or factors with unique respondent names?
densityplot(imp_prod, ~measurement_4)

product_failure_complete = complete(imp_prod)
summary(product_failure_complete)
##  failure     product_code    loading           attribute_0   
##  No :20921   A:5100       Min.   : 33.16   material_7:21320  
##  Yes: 5649   B:5250       1st Qu.:100.02   material_5: 5250  
##              C:5765       Median :122.46                     
##              D:5112       Mean   :128.01                     
##              E:5343       3rd Qu.:149.32                     
##                           Max.   :385.86                     
##      attribute_1     attribute_2     attribute_3   measurement_0   
##  material_8:10865   Min.   :5.000   Min.   :5.00   Min.   : 0.000  
##  material_5:10362   1st Qu.:6.000   1st Qu.:6.00   1st Qu.: 4.000  
##  material_6: 5343   Median :6.000   Median :8.00   Median : 7.000  
##                     Mean   :6.754   Mean   :7.24   Mean   : 7.416  
##                     3rd Qu.:8.000   3rd Qu.:8.00   3rd Qu.:10.000  
##                     Max.   :9.000   Max.   :9.00   Max.   :29.000  
##  measurement_1    measurement_2    measurement_3   measurement_4   
##  Min.   : 0.000   Min.   : 0.000   Min.   :13.97   Min.   : 8.008  
##  1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:17.11   1st Qu.:11.054  
##  Median : 8.000   Median : 6.000   Median :17.78   Median :11.735  
##  Mean   : 8.233   Mean   : 6.257   Mean   :17.79   Mean   :11.734  
##  3rd Qu.:11.000   3rd Qu.: 8.000   3rd Qu.:18.46   3rd Qu.:12.411  
##  Max.   :29.000   Max.   :24.000   Max.   :21.50   Max.   :16.484  
##  measurement_5   measurement_6   measurement_7    measurement_8  
##  Min.   :12.07   Min.   :12.71   Min.   : 7.968   Min.   :15.22  
##  1st Qu.:16.39   1st Qu.:16.86   1st Qu.:10.978   1st Qu.:18.27  
##  Median :17.10   Median :17.52   Median :11.686   Median :18.97  
##  Mean   :17.09   Mean   :17.54   Mean   :11.643   Mean   :18.96  
##  3rd Qu.:17.79   3rd Qu.:18.19   3rd Qu.:12.364   3rd Qu.:19.68  
##  Max.   :21.43   Max.   :21.54   Max.   :15.419   Max.   :23.81  
##  measurement_9    measurement_10   measurement_11  measurement_12  
##  Min.   : 7.537   Min.   : 9.323   Min.   :12.46   Min.   : 5.167  
##  1st Qu.:10.738   1st Qu.:15.219   1st Qu.:18.12   1st Qu.:10.534  
##  Median :11.445   Median :16.182   Median :19.23   Median :11.595  
##  Mean   :11.416   Mean   :16.125   Mean   :19.16   Mean   :11.532  
##  3rd Qu.:12.094   3rd Qu.:17.058   3rd Qu.:20.24   3rd Qu.:12.643  
##  Max.   :15.412   Max.   :22.479   Max.   :25.64   Max.   :17.663  
##  measurement_13  measurement_14  measurement_15   measurement_16  
##  Min.   :10.89   Min.   : 9.14   Min.   : 9.104   Min.   : 9.701  
##  1st Qu.:14.95   1st Qu.:15.07   1st Qu.:14.037   1st Qu.:15.199  
##  Median :15.72   Median :16.13   Median :15.057   Median :16.323  
##  Mean   :15.78   Mean   :16.09   Mean   :15.042   Mean   :16.311  
##  3rd Qu.:16.54   3rd Qu.:17.13   3rd Qu.:16.014   3rd Qu.:17.514  
##  Max.   :22.71   Max.   :22.30   Max.   :21.626   Max.   :24.094  
##  measurement_17  
##  Min.   : 196.8  
##  1st Qu.: 628.3  
##  Median : 715.1  
##  Mean   : 734.1  
##  3rd Qu.: 813.4  
##  Max.   :1312.8
str(product_failure_complete)
## 'data.frame':    26570 obs. of  25 variables:
##  $ failure       : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 2 1 1 ...
##  $ product_code  : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ loading       : num  80.1 84.9 82.4 101.1 188.1 ...
##  $ attribute_0   : Factor w/ 2 levels "material_7","material_5": 1 1 1 1 1 1 1 1 1 1 ...
##  $ attribute_1   : Factor w/ 3 levels "material_8","material_5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ attribute_2   : num  9 9 9 9 9 9 9 9 9 9 ...
##  $ attribute_3   : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ measurement_0 : num  7 14 12 13 9 11 12 4 9 10 ...
##  $ measurement_1 : num  8 3 1 2 2 4 2 8 6 4 ...
##  $ measurement_2 : num  4 3 5 6 8 0 4 8 5 7 ...
##  $ measurement_3 : num  18 18.2 18.1 17.3 19.3 ...
##  $ measurement_4 : num  12.5 11.5 11.7 11.2 12.9 ...
##  $ measurement_5 : num  15.7 17.7 16.7 18.6 17 ...
##  $ measurement_6 : num  19.3 17.9 18.2 18.3 15.7 ...
##  $ measurement_7 : num  11.7 12.7 12.7 12.6 11.3 ...
##  $ measurement_8 : num  20.2 17.9 18.3 19.1 18.1 ...
##  $ measurement_9 : num  10.7 12.4 12.7 12.5 10.3 ...
##  $ measurement_10: num  15.9 17.9 15.6 16.3 17.1 ...
##  $ measurement_11: num  17.6 17.9 16.2 18.4 19.9 ...
##  $ measurement_12: num  15.2 11.8 13.8 10 12.4 ...
##  $ measurement_13: num  15 14.7 16.7 15.2 16.2 ...
##  $ measurement_14: num  14.7 15.4 18.6 15.6 12.8 ...
##  $ measurement_15: num  13 14.4 14.1 16.2 13.2 ...
##  $ measurement_16: num  14.7 15.6 17.9 17.2 16.4 ...
##  $ measurement_17: num  764 682 663 826 580 ...
skim(product_failure_complete)
Data summary
Name product_failure_complete
Number of rows 26570
Number of columns 25
_______________________
Column type frequency:
factor 4
numeric 21
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
failure 0 1 FALSE 2 No: 20921, Yes: 5649
product_code 0 1 FALSE 5 C: 5765, E: 5343, B: 5250, D: 5112
attribute_0 0 1 FALSE 2 mat: 21320, mat: 5250
attribute_1 0 1 FALSE 3 mat: 10865, mat: 10362, mat: 5343

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
loading 0 1 128.01 39.20 33.16 100.02 122.46 149.32 385.86 ▃▇▂▁▁
attribute_2 0 1 6.75 1.47 5.00 6.00 6.00 8.00 9.00 ▅▇▁▅▃
attribute_3 0 1 7.24 1.46 5.00 6.00 8.00 8.00 9.00 ▃▃▁▇▃
measurement_0 0 1 7.42 4.12 0.00 4.00 7.00 10.00 29.00 ▆▇▂▁▁
measurement_1 0 1 8.23 4.20 0.00 5.00 8.00 11.00 29.00 ▅▇▃▁▁
measurement_2 0 1 6.26 3.31 0.00 4.00 6.00 8.00 24.00 ▅▇▂▁▁
measurement_3 0 1 17.79 1.00 13.97 17.11 17.78 18.46 21.50 ▁▃▇▃▁
measurement_4 0 1 11.73 1.00 8.01 11.05 11.73 12.41 16.48 ▁▅▇▁▁
measurement_5 0 1 17.09 1.01 12.07 16.39 17.10 17.78 21.42 ▁▂▇▃▁
measurement_6 0 1 17.54 1.02 12.71 16.86 17.52 18.19 21.54 ▁▁▇▅▁
measurement_7 0 1 11.64 1.08 7.97 10.98 11.69 12.36 15.42 ▁▃▇▃▁
measurement_8 0 1 18.96 1.05 15.22 18.27 18.97 19.68 23.81 ▁▆▇▂▁
measurement_9 0 1 11.42 1.00 7.54 10.74 11.45 12.09 15.41 ▁▃▇▃▁
measurement_10 0 1 16.13 1.40 9.32 15.22 16.18 17.06 22.48 ▁▂▇▂▁
measurement_11 0 1 19.16 1.58 12.46 18.12 19.23 20.24 25.64 ▁▂▇▃▁
measurement_12 0 1 11.53 1.65 5.17 10.53 11.60 12.64 17.66 ▁▂▇▃▁
measurement_13 0 1 15.78 1.23 10.89 14.95 15.72 16.54 22.71 ▁▇▇▁▁
measurement_14 0 1 16.09 1.47 9.14 15.07 16.13 17.13 22.30 ▁▂▇▃▁
measurement_15 0 1 15.04 1.51 9.10 14.04 15.06 16.01 21.63 ▁▃▇▂▁
measurement_16 0 1 16.31 1.79 9.70 15.20 16.32 17.51 24.09 ▁▃▇▂▁
measurement_17 0 1 734.15 163.43 196.79 628.26 715.09 813.43 1312.79 ▁▅▇▂▁
productfailure1 = product_failure_complete %>% dplyr::select(c("failure","loading","attribute_0","attribute_1","attribute_2","attribute_3"))
productfailure2 = product_failure_complete %>% dplyr::select(c("failure","measurement_0","measurement_1","measurement_2","measurement_3","measurement_4","measurement_5"))
productfailure3 = product_failure_complete %>% dplyr::select(c("failure","measurement_6","measurement_7","measurement_8","measurement_9","measurement_10","measurement_11"))
productfailure4 = product_failure_complete %>% dplyr::select(c("failure","measurement_12","measurement_13","measurement_14","measurement_15","measurement_16","measurement_17"))
summary(productfailure1)
##  failure        loading           attribute_0        attribute_1   
##  No :20921   Min.   : 33.16   material_7:21320   material_8:10865  
##  Yes: 5649   1st Qu.:100.02   material_5: 5250   material_5:10362  
##              Median :122.46                      material_6: 5343  
##              Mean   :128.01                                        
##              3rd Qu.:149.32                                        
##              Max.   :385.86                                        
##   attribute_2     attribute_3  
##  Min.   :5.000   Min.   :5.00  
##  1st Qu.:6.000   1st Qu.:6.00  
##  Median :6.000   Median :8.00  
##  Mean   :6.754   Mean   :7.24  
##  3rd Qu.:8.000   3rd Qu.:8.00  
##  Max.   :9.000   Max.   :9.00
ggpairs(productfailure1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(productfailure2)
##  failure     measurement_0    measurement_1    measurement_2    measurement_3  
##  No :20921   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   :13.97  
##  Yes: 5649   1st Qu.: 4.000   1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:17.11  
##              Median : 7.000   Median : 8.000   Median : 6.000   Median :17.78  
##              Mean   : 7.416   Mean   : 8.233   Mean   : 6.257   Mean   :17.79  
##              3rd Qu.:10.000   3rd Qu.:11.000   3rd Qu.: 8.000   3rd Qu.:18.46  
##              Max.   :29.000   Max.   :29.000   Max.   :24.000   Max.   :21.50  
##  measurement_4    measurement_5  
##  Min.   : 8.008   Min.   :12.07  
##  1st Qu.:11.054   1st Qu.:16.39  
##  Median :11.735   Median :17.10  
##  Mean   :11.734   Mean   :17.09  
##  3rd Qu.:12.411   3rd Qu.:17.79  
##  Max.   :16.484   Max.   :21.43
ggpairs(productfailure2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(productfailure3)
##  failure     measurement_6   measurement_7    measurement_8   measurement_9   
##  No :20921   Min.   :12.71   Min.   : 7.968   Min.   :15.22   Min.   : 7.537  
##  Yes: 5649   1st Qu.:16.86   1st Qu.:10.978   1st Qu.:18.27   1st Qu.:10.738  
##              Median :17.52   Median :11.686   Median :18.97   Median :11.445  
##              Mean   :17.54   Mean   :11.643   Mean   :18.96   Mean   :11.416  
##              3rd Qu.:18.19   3rd Qu.:12.364   3rd Qu.:19.68   3rd Qu.:12.094  
##              Max.   :21.54   Max.   :15.419   Max.   :23.81   Max.   :15.412  
##  measurement_10   measurement_11 
##  Min.   : 9.323   Min.   :12.46  
##  1st Qu.:15.219   1st Qu.:18.12  
##  Median :16.182   Median :19.23  
##  Mean   :16.125   Mean   :19.16  
##  3rd Qu.:17.058   3rd Qu.:20.24  
##  Max.   :22.479   Max.   :25.64
ggpairs(productfailure3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(productfailure4)
##  failure     measurement_12   measurement_13  measurement_14  measurement_15  
##  No :20921   Min.   : 5.167   Min.   :10.89   Min.   : 9.14   Min.   : 9.104  
##  Yes: 5649   1st Qu.:10.534   1st Qu.:14.95   1st Qu.:15.07   1st Qu.:14.037  
##              Median :11.595   Median :15.72   Median :16.13   Median :15.057  
##              Mean   :11.532   Mean   :15.78   Mean   :16.09   Mean   :15.042  
##              3rd Qu.:12.643   3rd Qu.:16.54   3rd Qu.:17.13   3rd Qu.:16.014  
##              Max.   :17.663   Max.   :22.71   Max.   :22.30   Max.   :21.626  
##  measurement_16   measurement_17  
##  Min.   : 9.701   Min.   : 196.8  
##  1st Qu.:15.199   1st Qu.: 628.3  
##  Median :16.323   Median : 715.1  
##  Mean   :16.311   Mean   : 734.1  
##  3rd Qu.:17.514   3rd Qu.: 813.4  
##  Max.   :24.094   Max.   :1312.8
ggpairs(productfailure4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggcorr(productfailure1, label = "true", label_round = 2)
## Warning in ggcorr(productfailure1, label = "true", label_round = 2): data in
## column(s) 'failure', 'attribute_0', 'attribute_1' are not numeric and were
## ignored

ggcorr(productfailure2, label = "true", label_round = 2)
## Warning in ggcorr(productfailure2, label = "true", label_round = 2): data in
## column(s) 'failure' are not numeric and were ignored

ggcorr(productfailure3, label = "true", label_round = 2)
## Warning in ggcorr(productfailure3, label = "true", label_round = 2): data in
## column(s) 'failure' are not numeric and were ignored

ggcorr(productfailure4, label = "true", label_round = 2)
## Warning in ggcorr(productfailure4, label = "true", label_round = 2): data in
## column(s) 'failure' are not numeric and were ignored

ggcorr(product_failure_complete, label = "true", label_round = 2)
## Warning in ggcorr(product_failure_complete, label = "true", label_round = 2):
## data in column(s) 'failure', 'product_code', 'attribute_0', 'attribute_1' are
## not numeric and were ignored

ggplot(product_failure_complete, aes(x=loading, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$product_code) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##               A         B         C         D         E
##   No  0.7727451 0.7996190 0.7883781 0.7824726 0.7930002
##   Yes 0.2272549 0.2003810 0.2116219 0.2175274 0.2069998
 ggplot(product_failure_complete, aes(x=product_code, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$product_code) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##               A         B         C         D         E
##   No  0.7727451 0.7996190 0.7883781 0.7824726 0.7930002
##   Yes 0.2272549 0.2003810 0.2116219 0.2175274 0.2069998
ggplot(product_failure_complete, aes(x=attribute_2, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$attribute_2) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##               5         6         8         9
##   No  0.7883781 0.7878527 0.7996190 0.7727451
##   Yes 0.2116219 0.2121473 0.2003810 0.2272549
ggplot(product_failure_complete, aes(x=attribute_0, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$attribute_0) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##       material_7 material_5
##   No   0.7843809  0.7996190
##   Yes  0.2156191  0.2003810
ggplot(product_failure_complete, aes(x=attribute_1, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$attribute_1) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##       material_8 material_5 material_6
##   No   0.7810400  0.7911600  0.7930002
##   Yes  0.2189600  0.2088400  0.2069998
ggplot(product_failure_complete, aes(x=attribute_3, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$attribute_3) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##               5         6         8         9
##   No  0.7727451 0.7824726 0.7937358 0.7930002
##   Yes 0.2272549 0.2175274 0.2062642 0.2069998
ggplot(product_failure_complete, aes(x=measurement_0, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$measurement_0) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##               0         1         2         3         4         5         6
##   No  0.8159851 0.7834793 0.7800797 0.8018610 0.7864035 0.7876209 0.7819602
##   Yes 0.1840149 0.2165207 0.2199203 0.1981390 0.2135965 0.2123791 0.2180398
##      
##               7         8         9        10        11        12        13
##   No  0.7844796 0.7928368 0.8037699 0.7884058 0.7682495 0.7877095 0.7890724
##   Yes 0.2155204 0.2071632 0.1962301 0.2115942 0.2317505 0.2122905 0.2109276
##      
##              14        15        16        17        18        19        20
##   No  0.7839506 0.8004386 0.7520891 0.7423581 0.7839196 0.6991150 0.7848101
##   Yes 0.2160494 0.1995614 0.2479109 0.2576419 0.2160804 0.3008850 0.2151899
##      
##              21        22        23        24        25        26        27
##   No  0.8113208 0.8684211 0.7647059 0.7083333 0.5714286 1.0000000 1.0000000
##   Yes 0.1886792 0.1315789 0.2352941 0.2916667 0.4285714 0.0000000 0.0000000
##      
##              29
##   No  1.0000000
##   Yes 0.0000000
ggplot(product_failure_complete, aes(x=measurement_1, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$measurement_1) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##               0         1         2         3         4         5         6
##   No  0.7671958 0.7411945 0.8031496 0.7930781 0.7766821 0.7864955 0.7827368
##   Yes 0.2328042 0.2588055 0.1968504 0.2069219 0.2233179 0.2135045 0.2172632
##      
##               7         8         9        10        11        12        13
##   No  0.7825235 0.7893724 0.7899873 0.7793202 0.7973046 0.8000000 0.8057296
##   Yes 0.2174765 0.2106276 0.2100127 0.2206798 0.2026954 0.2000000 0.1942704
##      
##              14        15        16        17        18        19        20
##   No  0.7892157 0.7927786 0.8004246 0.7634069 0.8018868 0.7810651 0.7659574
##   Yes 0.2107843 0.2072214 0.1995754 0.2365931 0.1981132 0.2189349 0.2340426
##      
##              21        22        23        24        25        26        27
##   No  0.7692308 0.8750000 0.8235294 0.7333333 0.8750000 1.0000000 0.5000000
##   Yes 0.2307692 0.1250000 0.1764706 0.2666667 0.1250000 0.0000000 0.5000000
##      
##              28        29
##   No  1.0000000 0.0000000
##   Yes 0.0000000 1.0000000
ggplot(product_failure_complete, aes(x=measurement_2, fill = failure)) + geom_bar(position="fill") + theme_bw()

t1 = table(product_failure_complete$failure, product_failure_complete$measurement_2) #create a table object
prop.table(t1, margin = 2 ) #crosstab with proportions
##      
##               0         1         2         3         4         5         6
##   No  0.7799043 0.7973856 0.8044597 0.7808511 0.7871846 0.7960506 0.7984152
##   Yes 0.2200957 0.2026144 0.1955403 0.2191489 0.2128154 0.2039494 0.2015848
##      
##               7         8         9        10        11        12        13
##   No  0.7752420 0.7793944 0.7856000 0.7972696 0.8030457 0.7717087 0.7522124
##   Yes 0.2247580 0.2206056 0.2144000 0.2027304 0.1969543 0.2282913 0.2477876
##      
##              14        15        16        17        18        19        20
##   No  0.7713178 0.7414966 0.7472527 0.7014925 0.7567568 0.6842105 0.6250000
##   Yes 0.2286822 0.2585034 0.2527473 0.2985075 0.2432432 0.3157895 0.3750000
##      
##              21        22        23        24
##   No  0.6250000 0.0000000 1.0000000 0.5000000
##   Yes 0.3750000 1.0000000 0.0000000 0.5000000
ggplot(product_failure_complete, aes(x=measurement_3, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_4, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_5, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_6, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_7, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_8, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_9, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_10, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_11, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_12, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_13, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_14, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_15, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_16, fill = failure)) + geom_bar(position="fill") + theme_bw()

ggplot(product_failure_complete, aes(x=measurement_17, fill = failure)) + geom_bar(position="fill") + theme_bw()